// cd /projects/data_commons/cw_code/
// qstata cw_ind_2_naics_imp.do &

global dir_proj = "/projects/data_commons/"
global dir_cw = "${dir_proj}/cw/"
global dir_lbd = "${dir_proj}/lbd/"

di "Started at $S_DATE $S_TIME"

cd ${dir_cw}/

//==============================================================================

import delimited "cw_ind_fk_raw_man.csv", clear varn(1)

recast double ch_ind
replace ch_ind = round(ch_ind, 0.1)

keep ch_ind fk_naics ch_ind_new
list if ch_ind == .
drop if ch_ind == .

gen in_fk = 1
rename fk_naics naics

merge 1:1 naics using "cw_ind_naics"

do /projects/data_commons/cw_code/cw_ind_1_jsung_fix.do
f_jsung_naics_fix "naics"

sort ch_ind naics
by ch_ind: egen ind_change = total(ch_ind_new != .)
gen flag = 1 if ind_change > 0 & in_fk == .

tab flag

// Automatically assign naics to new industry if old industry is mapped to one new industry
sort ch_ind ch_ind_new
by ch_ind ch_ind_new: gen i_ind = 1 if _n == 1 & ch_ind_new != .
by ch_ind: egen n_ind = total(i_ind)
by ch_ind: egen m_ind = mode(ch_ind_new) if n_ind == 1

replace ch_ind_new = m_ind if flag == 1 & m_ind != .

tab flag if ch_ind_new == .

sort ch_ind naics
keep naics ch_ind ch_ind_new flag in_fk
order naics ch_ind ch_ind_new flag in_fk

replace ch_ind = ch_ind_new if ch_ind_new != .
keep ch_ind naics
order ch_ind naics
sort naics

save "cw_ind_naics_imp", replace
saveold "cw_ind_naics_imp_v12", replace v(12)
export delimited "cw_ind_naics_imp.csv", replace

di "Ended at $S_DATE $S_TIME"
// End of do file
